# 用于输出genre.csv
import pandas as pd
from mongo import (
  collection_movie_info,
  get_all_data
)
from utils import (
  store_data_to_csv,
  log_message
)
def create_info_csv():
  movie_data = get_all_data(collection_movie_info)
  result = []
  for item in movie_data:
    if 'id' not in item:
      continue
    result.append({
      'movieId': item['id'],
      # 'title': item['title'], #这个数据项在训练集和测试集已经存在
      'overview': item['overview'],
      'revenue': item['revenue'],
      'release_date': item['release_date'],
      'vote_average': item['vote_average'],
      'popularity': item['popularity'],
      'vote_count': item['vote_count']
    })
  final_result = pd.DataFrame(result)
  final_result.to_csv('../data/llm-pretrain-data/infos.csv', index=False)
  log_message(f"文件存储完毕")

create_info_csv()